/****************************************************************************/
/*This program opens the tornado data keeps the counties we will use and    */
/*grabs only the businesses that are in these counties                      */
/****************************************************************************/
 
#delimit;
clear;
set matsize 800;
set more off;
capture log close;
set trace off;

/****************************************************************************/
/* DEFINE THE PROJECT FOLDER                                                */
/****************************************************************************/

local pathsmr  C:\Research;
local datapath	 G:\infogroup\CSV_state;

/****************************************************************************/
/*DEFINE PATH TO FOLDERS                                                    */
/****************************************************************************/

local Torpath  `pathsmr'\Tornadoes;

/****************************************************************************/
/*Open tornado data and create a list of counties affected by tornados      */
/****************************************************************************/

use `Torpath'\Data\tornado_data_cleaned_0515MilesBuffer.dta.dta, clear;

gen stcty = substr(bgfips,1,5);
gen st = substr(bgfips,1,2);

gen temp = 1;

sort stcty;

collapse (sum) temp, by(stcty);   

sort stcty;
save `Torpath'\Data\tornadoes_stcty_intensity.dta, replace;

/****************************************************************************/
/*Open, clean and merge business data with tornado data to get a clean business data set*/
/****************************************************************************/

/*******************************************/
/*Start Year Loop                          */
/*******************************************/

foreach i of numlist 1997(1)2017 { ;

/*******************************************/
/*Start state Loop                         */
/*******************************************/

*foreach k of numlist 1 2 4 5 6 8(1)13 15(1)42 44(1)51 53 54 55 56 { ;
foreach k of numlist 1 5 13 17(1)22 24 28 29 31 39 40 47 48 { ;

display "state `k'";
display "year `i'";

clear;
insheet using "`datapath'\year`i'\infogroup_bus_`i'_`k'.csv";

/*******************************************/
/*Data Cleaning                            */
/*******************************************/

capture destring slsvdt, force replace;
capture drop if cntycd ==.;
capture drop acrflg ssic1 trdate parentsalesvolumecode;
 
if `i' >= 2005 {;
capture drop if year == "A";
capture drop if year == "A0";
capture drop if year == "A1";
capture drop if year == "A8";
capture drop if year == "A9";
capture drop if year == "AA";
capture drop if year == "AB";
capture drop if year == "AC";
capture drop if year == "AU";
capture drop if year == "B";
capture drop if year == "B0";
capture drop if year == "B1";
capture drop if year == "B8";
capture drop if year == "B9";
capture drop if year == "BA";
capture drop if year == "BB";
capture drop if year == "BC";
capture drop if year == "C";
capture drop if year == "C0";
capture drop if year == "C1";
capture drop if year == "C8";
capture drop if year == "C9";
capture drop if year == "CA";
capture drop if year == "CB";
capture drop if year == "CC";
capture drop if year == "D0";
capture drop if year == "D1";
capture drop if year == "D8";
capture drop if year == "D9";
capture drop if year == "DA";
capture drop if year == "DB";
capture drop if year == "DC";
capture drop if year == "U";
capture drop if year == "U0";
capture drop if year == "U8";
capture drop if year == "U9";
capture drop if year == "UA";
capture drop if year == "UB";
capture drop if year == "UC";
capture drop if year == "I0";

capture destring year, replace;

capture replace empsdt = "0" if empsdt == "F";
capture replace empsdt = "0" if empsdt == "M";
capture replace empsdt = "0" if empsdt == "1M";
capture replace empsdt = "0" if empsdt == "3M";

capture gen temp1 = substr(empsdt,1,1);
capture replace empsdt = substr(empsdt,2,.) if temp1 == "F";
capture replace empsdt = substr(empsdt,2,.) if temp1 == "M";
capture drop temp1;

capture gen temp2 = substr(empsdt,2,1);
capture replace empsdt = "0" if temp2 == "F";
capture replace empsdt = "0" if temp2 == "M";
capture drop temp2;

capture destring empsdt, replace;

};

if `i' < 2011 {;
gen year_temp = 1984 if year == 84;
replace year_temp = 1985 if year == 85;
replace year_temp = 1986 if year == 86;
replace year_temp = 1987 if year == 87;
replace year_temp = 1988 if year == 88;
replace year_temp = 1989 if year == 89;
replace year_temp = 1990 if year == 90;
replace year_temp = 1991 if year == 91;
replace year_temp = 1992 if year == 92;
replace year_temp = 1993 if year == 93;
replace year_temp = 1994 if year == 94;
replace year_temp = 1995 if year == 95;
replace year_temp = 1996 if year == 96;
replace year_temp = 1997 if year == 97;
replace year_temp = 1998 if year == 98;
replace year_temp = 1999 if year == 99;
replace year_temp = 2000 if year == 0;
replace year_temp = 2001 if year == 1;
replace year_temp = 2002 if year == 2;
replace year_temp = 2003 if year == 3;
replace year_temp = 2004 if year == 4;
replace year_temp = 2005 if year == 5;
replace year_temp = 2006 if year == 6;
replace year_temp = 2007 if year == 7;
replace year_temp = 2008 if year == 8;
replace year_temp = 2009 if year == 9;
replace year_temp = 2010 if year == 10;
replace year_temp = 2011 if year == 11;
replace year_temp = 2012 if year == 12;
replace year_temp = 2013 if year == 13;
replace year_temp = 2014 if year == 14;
replace year_temp = 2015 if year == 15;
replace year_temp = 2016 if year == 16;
replace year_temp = 2017 if year == 17;

drop year;
rename year_temp year;
};

gen str st_str = string(stcode);
replace st_str = "0" + st_str if length(st_str) ==1;

/*infogroup changed a bunch of variable names */
if `i' == 2015 | `i' == 2016 | `i' == 2017 {;
rename zipcode zip;
rename countycode cntycd;
rename primarysiccode prmsic;
};

gen str cty_str = string(cntycd);
replace cty_str = "0" + cty_str if length(cty_str) ==2;
replace cty_str = "00" + cty_str if length(cty_str) ==1;

gen stcty = st_str + cty_str;


gen str zipcode = string(zip);
replace zipcode = "0" + zipcode if length(zipcode) ==4;
replace zipcode = "00" + zipcode if length(zipcode) ==3;
replace zipcode = "000" + zipcode if length(zipcode) ==2;
replace zipcode = "0000" + zipcode if length(zipcode) ==1;


if `i' < 2010 {;
gen str sic_str = string(sic);
gen sic2code = substr(sic_str,1,2) if length(sic_str) == 6;
replace sic2code = "0" + substr(sic_str,1,1) if length(sic_str) == 5;
drop sic_str;
};

if `i' >= 2010 {;
gen str sic_str = string(prmsic);
gen sic2code = substr(sic_str,1,2) if length(sic_str) == 6;
replace sic2code = "0" + substr(sic_str,1,1) if length(sic_str) == 5;
drop sic_str;
};

if `i' == 2008 & `k' == 38 {;
gen str str_yearet = string(yearet);
drop yearet;
rename str_yearet yearet;
};

if `i' == 2009 & `k' == 38 {;
gen str str_yearet = string(yearet);
drop yearet;
rename str_yearet yearet;
};

if `i' == 2010 & `k' == 38 {;
gen str str_secondary_zip_code = string(secondary_zip_code);
drop secondary_zip_code;
rename str_secondary_zip_code secondary_zip_code;
};

capture drop trdate;
capture drop prof;
capture drop offsiz;
capture drop genfld;
capture drop contcd;
capture drop co2srt;
capture drop pempsz;
capture drop ssic1;
capture drop acrflg;
capture drop pid;
capture drop endrec;
capture drop egcode;
capture drop phone;
capture drop parentsalesvolumecode;


gen num_bus = 1;

capture destring lat, replace;

gen datayear = `i';

sort stcty;
merge m:1 stcty using `Torpath'\Data\tornadoes_stcty_intensity.dta;
tab _merge;

keep if _merge ==3;

drop _merge;

if `k' == 1 {;
save `Torpath'\Data\business_level_`i'_long.dta, replace;
};

if `k' > 1 {;
append using `Torpath'\Data\business_level_`i'_long.dta;
save `Torpath'\Data\business_level_`i'_long.dta, replace; 
};


};  /*Close State Loop*/

outsheet using `Torpath'\Data\business_level_`i'_long.csv, replace;

*outsheet using `Torpath'\Data\business_level_`i'_missing_cty.csv if stcty == "13015" | stcty == "13129" | stcty == "13171" | 
                 stcty == "13199" | stcty == "22065" | stcty == "28019" | stcty == "28051" | stcty == "28055" | stcty == "28105" | 
                 stcty == "28125" | stcty == "28149" | stcty == "28163" | stcty == "47051" | stcty == "47075" | stcty == "47077" | stcty == "47103", replace;

*outsheet using `Torpath'\Data\business_level_`i'_missing2_cty.csv if stcty == "13191" | stcty == "17059" | stcty == "17099" | 
                 stcty == "17105" | stcty == "17165" | stcty == "17189" | stcty == "20009" | stcty == "20097" | stcty == "20099" | 
                 stcty == "21107" | stcty == "22127" | stcty == "28057" | stcty == "28129" | stcty == "31027" | stcty == "31051" | stcty == "31095" |
                 stcty == "31179" | stcty == "40035" | stcty == "40067" | stcty == "40115", replace;

*outsheet using `Torpath'\Data\business_level_`i'_missing3_cty.csv if stcty == "48337" | stcty == "18001", replace;

};  /*Close year Loop*/

log close;



